<!DOCTYPE html>
<html lang="si">
<head>
    <meta http-equiv="content-type" content="text/html;charset=utf-8"/>
    <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
    <meta name="description" content="ට්රාන්ස්ෆෝමර්-එක්ස්එල් ආකෘතියේ පැහැදිලි කිරීම් සමඟ ලේඛනගත කිරීම."/>

    <meta name="twitter:card" content="summary"/>
    <meta name="twitter:image:src" content="https://avatars1.githubusercontent.com/u/64068543?s=400&amp;v=4"/>
    <meta name="twitter:title" content="ට්රාන්ස්ෆෝමර් 40"/>
    <meta name="twitter:description" content="ට්රාන්ස්ෆෝමර්-එක්ස්එල් ආකෘතියේ පැහැදිලි කිරීම් සමඟ ලේඛනගත කිරීම."/>
    <meta name="twitter:site" content="@labmlai"/>
    <meta name="twitter:creator" content="@labmlai"/>

    <meta property="og:url" content="https://nn.labml.ai/transformers/xl/index.html"/>
    <meta property="og:title" content="ට්රාන්ස්ෆෝමර් 40"/>
    <meta property="og:image" content="https://avatars1.githubusercontent.com/u/64068543?s=400&amp;v=4"/>
    <meta property="og:site_name" content="ට්රාන්ස්ෆෝමර් 40"/>
    <meta property="og:type" content="object"/>
    <meta property="og:title" content="ට්රාන්ස්ෆෝමර් 40"/>
    <meta property="og:description" content="ට්රාන්ස්ෆෝමර්-එක්ස්එල් ආකෘතියේ පැහැදිලි කිරීම් සමඟ ලේඛනගත කිරීම."/>

    <title>ට්රාන්ස්ෆෝමර් 40</title>
    <link rel="shortcut icon" href="/icon.png"/>
    <link rel="stylesheet" href="../../pylit.css?v=1">
    <link rel="canonical" href="https://nn.labml.ai/transformers/xl/index.html"/>
    <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/katex@0.13.18/dist/katex.min.css" integrity="sha384-zTROYFVGOfTw7JV7KUu8udsvW2fx4lWOsCEDqhBreBwlHI4ioVRtmIvEThzJHGET" crossorigin="anonymous">

    <!-- Global site tag (gtag.js) - Google Analytics -->
    <script async src="https://www.googletagmanager.com/gtag/js?id=G-4V3HC8HBLH"></script>
    <script>
        window.dataLayer = window.dataLayer || [];

        function gtag() {
            dataLayer.push(arguments);
        }

        gtag('js', new Date());

        gtag('config', 'G-4V3HC8HBLH');
    </script>
</head>
<body>
<div id='container'>
    <div id="background"></div>
    <div class='section'>
        <div class='docs'>
            <p>
                <a class="parent" href="/">home</a>
                <a class="parent" href="../index.html">transformers</a>
                <a class="parent" href="index.html">xl</a>
            </p>
            <p>
                <a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations" target="_blank">
                    <img alt="Github"
                         src="https://img.shields.io/github/stars/labmlai/annotated_deep_learning_paper_implementations?style=social"
                         style="max-width:100%;"/></a>
                <a href="https://twitter.com/labmlai" rel="nofollow" target="_blank">
                    <img alt="Twitter"
                         src="https://img.shields.io/twitter/follow/labmlai?style=social"
                         style="max-width:100%;"/></a>
            </p>
            <p>
                <a href="https://github.com/labmlai/annotated_deep_learning_paper_implementations/tree/master/labml_nn/transformers/xl/__init__.py" target="_blank">
                    View code on Github</a>
            </p>
        </div>
    </div>
    <div class='section' id='section-0'>
        <div class='docs doc-strings'>
            <div class='section-link'>
                <a href='#section-0'>#</a>
            </div>
            <h1>ට්රාන්ස්ෆෝමර්40</h1>
<p>මෙය <a href="https://papers.labml.ai/paper/1901.02860">ට්රාන්ස්ෆෝමර්-එක්ස්එල් ක්රියාත්මක කිරීමයි: <a href="https://pytorch.org">PyTorch</a> හි ස්ථාවර දිග සන්දර්භයකින් ඔබ්බට අවධානය යොමු කරන භාෂා ආකෘති</a> . </p>
<p>ට්රාන්ස්ෆෝමර්සමාන්තරව පුහුණු කරන ලද අනුක්රමයේ දිගට සමාන සීමිත අවධානයක් ඇත. මෙම සියලු තනතුරු ස්ථාවර ස්ථානීය කේතන ඇත. ට්රාන්ස්ෆෝමර් එක්ස්එල් මෙම අවධානය පරතරය වැඩි කරන්නේ එක් එක් තනතුරු පූර්ව ගණනය කළ අතීත කාවැද්දීම් කෙරෙහි අවධානය යොමු කිරීමට ඉඩ දීමෙනි. නිදසුනක් ලෙස සන්දර්භය දිග නම් <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.69444em;vertical-align:0em;"></span><span class="mord coloredeq eqa" style=""><span class="mord mathnormal" style="margin-right:0.01968em">l</span></span></span></span></span></span>, එය පෙර දිග <span ><span class="katex"><span aria-hidden="true" class="katex-html"><span class="base"><span class="strut" style="height:0.69444em;vertical-align:0em;"></span><span class="mord coloredeq eqa" style=""><span class="mord mathnormal" style="margin-right:0.01968em">l</span></span></span></span></span></span> කණ්ඩායම සඳහා සියලු ස්ථරවල කාවැද්දීම් තබා ඒවා වර්තමාන පියවරට පෝෂණය කරනු ඇත. අපි ස්ථාවර ස්ථානීය කේතීකරණ භාවිතා කරන්නේ නම් මෙම පූර්ව ගණනය කරන ලද කාවැද්දීම් වර්තමාන සන්දර්භයට සමාන ස්ථාන ඇත. ඔවුන් සාපේක්ෂ ස්ථානීය කේතන ක්රමයක් හඳුන්වා දෙන අතර එහිදී අවධානය ගණනය කිරීමේදී ස්ථානීය කේතීකරණ හඳුන්වා දෙනු ලැබේ. </p>
<p>සාපේක්ෂබහු ශීර්ෂ අවධානය යොමු කිරීම ක්රියාත්මක කිරීම ක්රියාත්මක වේ <a href="relative_mha.html"><code  class="highlight"><span></span><span class="n">relative_mha</span><span class="o">.</span><span class="n">py</span></code>
</a>. </p>
<p>මෙන්නකුඩා ෂේක්ස්පියර් දත්ත කට්ටලය පිළිබඳ ට්රාන්ස්ෆෝමර් එක්ස්එල් ආකෘතියක් පුහුණු කිරීම සඳහා පුහුණු <a href="experiment.html">කේතය</a> සහ සටහන් පොතක්. </p>
<p><a href="https://colab.research.google.com/github/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/transformers/xl/experiment.ipynb"><img alt="Open In Colab" src="https://colab.research.google.com/assets/colab-badge.svg"></a> <a href="https://app.labml.ai/run/d3b6760c692e11ebb6a70242ac1c0002"> <img alt="View Run" src="https://img.shields.io/badge/labml-experiment-brightgreen"></a></p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">36</span><span></span><span class="kn">from</span> <span class="nn">typing</span> <span class="kn">import</span> <span class="n">List</span><span class="p">,</span> <span class="n">Optional</span>
<span class="lineno">37</span>
<span class="lineno">38</span><span class="kn">import</span> <span class="nn">torch</span>
<span class="lineno">39</span><span class="kn">import</span> <span class="nn">torch.nn</span> <span class="k">as</span> <span class="nn">nn</span>
<span class="lineno">40</span>
<span class="lineno">41</span><span class="kn">from</span> <span class="nn">labml_helpers.module</span> <span class="kn">import</span> <span class="n">Module</span>
<span class="lineno">42</span><span class="kn">from</span> <span class="nn">labml_nn.utils</span> <span class="kn">import</span> <span class="n">clone_module_list</span>
<span class="lineno">43</span><span class="kn">from</span> <span class="nn">.relative_mha</span> <span class="kn">import</span> <span class="n">RelativeMultiHeadAttention</span>
<span class="lineno">44</span><span class="kn">from</span> <span class="nn">..feed_forward</span> <span class="kn">import</span> <span class="n">FeedForward</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-1'>
        <div class='docs doc-strings'>
            <div class='section-link'>
                <a href='#section-1'>#</a>
            </div>
            <h2>XLස්ථරය ට්රාන්ස්ෆෝමර්</h2>
<p>ට්රාන්ස්ෆෝමර්එක්ස්එල් ආකෘතිය මෙම ස්ථර ගණනාවකින් සමන්විත වේ. </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">47</span><span class="k">class</span> <span class="nc">TransformerXLLayer</span><span class="p">(</span><span class="n">Module</span><span class="p">):</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-2'>
        <div class='docs doc-strings'>
            <div class='section-link'>
                <a href='#section-2'>#</a>
            </div>
            <ul><li><code  class="highlight"><span></span><span class="n">d_model</span></code>
 ටෝකනය කාවැද්දීමේ ප්රමාණයයි </li>
<li><code  class="highlight"><span></span><span class="n">self_attn</span></code>
 <a href="relative_mha.html">ස්වයං අවධානය මොඩියුලය</a> </li>
<li><code  class="highlight"><span></span><span class="n">feed_forward</span></code>
 යනු ආහාර ඉදිරි මොඩියුලයයි </li>
<li><code  class="highlight"><span></span><span class="n">dropout_prob</span></code>
 ස්වයං අවධානයෙන් පසු ඉවත් වීමේ සම්භාවිතාව සහ FFN</li></ul>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">53</span>    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span>
<span class="lineno">54</span>                 <span class="n">d_model</span><span class="p">:</span> <span class="nb">int</span><span class="p">,</span>
<span class="lineno">55</span>                 <span class="n">self_attn</span><span class="p">:</span> <span class="n">RelativeMultiHeadAttention</span><span class="p">,</span>
<span class="lineno">56</span>                 <span class="n">feed_forward</span><span class="p">:</span> <span class="n">FeedForward</span><span class="p">,</span>
<span class="lineno">57</span>                 <span class="n">dropout_prob</span><span class="p">:</span> <span class="nb">float</span><span class="p">):</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-3'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-3'>#</a>
            </div>
            
        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">64</span>        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span>
<span class="lineno">65</span>        <span class="bp">self</span><span class="o">.</span><span class="n">size</span> <span class="o">=</span> <span class="n">d_model</span>
<span class="lineno">66</span>        <span class="bp">self</span><span class="o">.</span><span class="n">self_attn</span> <span class="o">=</span> <span class="n">self_attn</span>
<span class="lineno">67</span>        <span class="bp">self</span><span class="o">.</span><span class="n">feed_forward</span> <span class="o">=</span> <span class="n">feed_forward</span>
<span class="lineno">68</span>        <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">Dropout</span><span class="p">(</span><span class="n">dropout_prob</span><span class="p">)</span>
<span class="lineno">69</span>        <span class="bp">self</span><span class="o">.</span><span class="n">norm_self_attn</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">d_model</span><span class="p">])</span>
<span class="lineno">70</span>        <span class="bp">self</span><span class="o">.</span><span class="n">norm_ff</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">d_model</span><span class="p">])</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-4'>
        <div class='docs doc-strings'>
            <div class='section-link'>
                <a href='#section-4'>#</a>
            </div>
            <ul><li><code  class="highlight"><span></span><span class="n">x</span></code>
 ටෝකන් මට්ටමේ ලක්ෂණය හැඩය දෛශික ක tensor වේ <code  class="highlight"><span></span><span class="p">[</span><span class="n">seq_len</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">d_model</span><span class="p">]</span></code>
 </li>
<li><code  class="highlight"><span></span><span class="n">mem</span></code>
 යනු අතීත ටෝකන් මට්ටමේ විශේෂාංගයේ හැඩයේ දෛශිකවල ටෙන්සරයකි <code  class="highlight"><span></span><span class="p">[</span><span class="n">mem_len</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">d_model</span><span class="p">]</span></code>
 </li>
<li><code  class="highlight"><span></span><span class="n">mask</span></code>
 යනු හැඩයේ අනුකෘතියක් <code  class="highlight"><span></span><span class="p">[</span><span class="n">seq_len</span><span class="p">,</span> <span class="n">mem_len</span> <span class="o">+</span> <span class="n">seq_len</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">]</span></code>
 හෝ <code  class="highlight"><span></span><span class="p">[</span><span class="n">seq_len</span><span class="p">,</span> <span class="n">mem_len</span> <span class="o">+</span> <span class="n">seq_len</span><span class="p">,</span> <span class="mi">1</span><span class="p">]</span></code>
. <code  class="highlight"><span></span><span class="n">mask</span><span class="p">[</span><span class="n">i</span><span class="p">,</span> <span class="n">j</span><span class="p">]</span></code>
 ටෝකන් වලට ටෝකනය දැකිය <code  class="highlight"><span></span><span class="n">i</span></code>
 හැකි නම් <code  class="highlight"><span></span><span class="n">j</span></code>
සත්යයකි. </li></ul>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">72</span>    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">*</span><span class="p">,</span>
<span class="lineno">73</span>                <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span>
<span class="lineno">74</span>                <span class="n">mem</span><span class="p">:</span> <span class="n">Optional</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span>
<span class="lineno">75</span>                <span class="n">mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-5'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-5'>#</a>
            </div>
            <p>ස්වයංඅවධානය යොමු කිරීමට පෙර දෛශික සාමාන්යකරණය කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">83</span>        <span class="n">z</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_self_attn</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-6'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-6'>#</a>
            </div>
            <p>මතකයක්තිබේ නම් </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">85</span>        <span class="k">if</span> <span class="n">mem</span> <span class="ow">is</span> <span class="ow">not</span> <span class="kc">None</span><span class="p">:</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-7'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-7'>#</a>
            </div>
            <p>එයසාමාන්යකරණය කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">87</span>            <span class="n">mem</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_self_attn</span><span class="p">(</span><span class="n">mem</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-8'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-8'>#</a>
            </div>
            <p>සමඟඑකඟ වන්න <code  class="highlight"><span></span><span class="n">z</span></code>
 </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">89</span>            <span class="n">m_z</span> <span class="o">=</span> <span class="n">torch</span><span class="o">.</span><span class="n">cat</span><span class="p">((</span><span class="n">mem</span><span class="p">,</span> <span class="n">z</span><span class="p">),</span> <span class="n">dim</span><span class="o">=</span><span class="mi">0</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-9'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-9'>#</a>
            </div>
            <p>මතකයක්නොමැති නම් නොසලකා හරින්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">91</span>        <span class="k">else</span><span class="p">:</span>
<span class="lineno">92</span>            <span class="n">m_z</span> <span class="o">=</span> <span class="n">z</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-10'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-10'>#</a>
            </div>
            <p>අවධානය </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">94</span>        <span class="n">self_attn</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">self_attn</span><span class="p">(</span><span class="n">query</span><span class="o">=</span><span class="n">z</span><span class="p">,</span> <span class="n">key</span><span class="o">=</span><span class="n">m_z</span><span class="p">,</span> <span class="n">value</span><span class="o">=</span><span class="n">m_z</span><span class="p">,</span> <span class="n">mask</span><span class="o">=</span><span class="n">mask</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-11'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-11'>#</a>
            </div>
            <p>අවධානයයොමු ප්රතිඵල එකතු කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">96</span>        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">self_attn</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-12'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-12'>#</a>
            </div>
            <p>පෝෂණයසඳහා සාමාන්යකරණය කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">99</span>        <span class="n">z</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm_ff</span><span class="p">(</span><span class="n">x</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-13'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-13'>#</a>
            </div>
            <p>Feed-forwardජාලය හරහා ගමන් කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">101</span>        <span class="n">ff</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">feed_forward</span><span class="p">(</span><span class="n">z</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-14'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-14'>#</a>
            </div>
            <p>ප්රතිපෝෂණඉදිරි ප්රති results ල නැවත එක් කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">103</span>        <span class="n">x</span> <span class="o">=</span> <span class="n">x</span> <span class="o">+</span> <span class="bp">self</span><span class="o">.</span><span class="n">dropout</span><span class="p">(</span><span class="n">ff</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-15'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-15'>#</a>
            </div>
            <p> </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">106</span>        <span class="k">return</span> <span class="n">x</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-16'>
        <div class='docs doc-strings'>
            <div class='section-link'>
                <a href='#section-16'>#</a>
            </div>
            <h2>ට්රාන්ස්ෆෝමර්XL ආකෘතිය</h2>
<p>මෙයබහු ට්රාන්ස්ෆෝමර් එක්ස්එල් ස්ථර වලින් සමන්විත වේ</p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">109</span><span class="k">class</span> <span class="nc">TransformerXL</span><span class="p">(</span><span class="n">Module</span><span class="p">):</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-17'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-17'>#</a>
            </div>
            
        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">116</span>    <span class="k">def</span> <span class="fm">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">layer</span><span class="p">:</span> <span class="n">TransformerXLLayer</span><span class="p">,</span> <span class="n">n_layers</span><span class="p">:</span> <span class="nb">int</span><span class="p">):</span>
<span class="lineno">117</span>        <span class="nb">super</span><span class="p">()</span><span class="o">.</span><span class="fm">__init__</span><span class="p">()</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-18'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-18'>#</a>
            </div>
            <p>ට්රාන්ස්ෆෝමර්ස්ථරයේ පිටපත් සාදන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">119</span>        <span class="bp">self</span><span class="o">.</span><span class="n">layers</span> <span class="o">=</span> <span class="n">clone_module_list</span><span class="p">(</span><span class="n">layer</span><span class="p">,</span> <span class="n">n_layers</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-19'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-19'>#</a>
            </div>
            <p>අවසානසාමාන්යකරණ ස්තරය </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">121</span>        <span class="bp">self</span><span class="o">.</span><span class="n">norm</span> <span class="o">=</span> <span class="n">nn</span><span class="o">.</span><span class="n">LayerNorm</span><span class="p">([</span><span class="n">layer</span><span class="o">.</span><span class="n">size</span><span class="p">])</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-20'>
        <div class='docs doc-strings'>
            <div class='section-link'>
                <a href='#section-20'>#</a>
            </div>
            <ul><li><code  class="highlight"><span></span><span class="n">x</span></code>
 යනු හැඩයේ ටෝකන් කාවැද්දීමේ දෛශිකවල ආතකයකි <code  class="highlight"><span></span><span class="p">[</span><span class="n">seq_len</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">d_model</span><span class="p">]</span></code>
 </li>
<li><code  class="highlight"><span></span><span class="n">mem</span></code>
 යනු එක් එක් ස්තරය <code  class="highlight"><span></span><span class="p">[</span><span class="n">mem_len</span><span class="p">,</span> <span class="n">batch_size</span><span class="p">,</span> <span class="n">d_model</span><span class="p">]</span></code>
 සඳහා හැඩයේ දෛශික අතීත ටෝකන් මට්ටමේ ආතති ලැයිස්තුවකි </li>
<li><code  class="highlight"><span></span><span class="n">mask</span></code>
 ආවරණ අනුකෘතිය වේ</li></ul>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">123</span>    <span class="k">def</span> <span class="nf">forward</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">x</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">,</span> <span class="n">mem</span><span class="p">:</span> <span class="n">List</span><span class="p">[</span><span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">],</span> <span class="n">mask</span><span class="p">:</span> <span class="n">torch</span><span class="o">.</span><span class="n">Tensor</span><span class="p">):</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-21'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-21'>#</a>
            </div>
            <p>ටෝකන්මට්ටමේ විශේෂාංග දෛශික ගබඩා කිරීම සඳහා ලැයිස්තු ගත කරන්න, එය ඊළඟ අනුක්රමික කණ්ඩායම සඳහා මතකයන් බවට පත්වනු ඇත. </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">132</span>        <span class="n">new_mem</span> <span class="o">=</span> <span class="p">[]</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-22'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-22'>#</a>
            </div>
            <p>එක්එක් ට්රාන්ස්ෆෝමර් ස්ථරය හරහා ධාවනය කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">134</span>        <span class="k">for</span> <span class="n">i</span><span class="p">,</span> <span class="n">layer</span> <span class="ow">in</span> <span class="nb">enumerate</span><span class="p">(</span><span class="bp">self</span><span class="o">.</span><span class="n">layers</span><span class="p">):</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-23'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-23'>#</a>
            </div>
            <p>විශේෂාංගදෛශික ලැයිස්තුවට එක් කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">136</span>            <span class="n">new_mem</span><span class="o">.</span><span class="n">append</span><span class="p">(</span><span class="n">x</span><span class="o">.</span><span class="n">detach</span><span class="p">())</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-24'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-24'>#</a>
            </div>
            <p>මතකය </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">138</span>            <span class="n">m</span> <span class="o">=</span> <span class="n">mem</span><span class="p">[</span><span class="n">i</span><span class="p">]</span> <span class="k">if</span> <span class="n">mem</span> <span class="k">else</span> <span class="kc">None</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-25'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-25'>#</a>
            </div>
            <p>ට්රාන්ස්ෆෝමර්XL ස්තරය හරහා ධාවනය කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">140</span>            <span class="n">x</span> <span class="o">=</span> <span class="n">layer</span><span class="p">(</span><span class="n">x</span><span class="o">=</span><span class="n">x</span><span class="p">,</span> <span class="n">mem</span><span class="o">=</span><span class="n">m</span><span class="p">,</span> <span class="n">mask</span><span class="o">=</span><span class="n">mask</span><span class="p">)</span></pre></div>
        </div>
    </div>
    <div class='section' id='section-26'>
        <div class='docs'>
            <div class='section-link'>
                <a href='#section-26'>#</a>
            </div>
            <p>අවසානවශයෙන්, දෛශික සාමාන්යකරණය කරන්න </p>

        </div>
        <div class='code'>
            <div class="highlight"><pre><span class="lineno">142</span>        <span class="k">return</span> <span class="bp">self</span><span class="o">.</span><span class="n">norm</span><span class="p">(</span><span class="n">x</span><span class="p">),</span> <span class="n">new_mem</span></pre></div>
        </div>
    </div>
    <div class='footer'>
        <a href="https://papers.labml.ai">Trending Research Papers</a>
        <a href="https://labml.ai">labml.ai</a>
    </div>
</div>
<script src=../../interactive.js?v=1"></script>
<script>
    function handleImages() {
        var images = document.querySelectorAll('p>img')

        for (var i = 0; i < images.length; ++i) {
            handleImage(images[i])
        }
    }

    function handleImage(img) {
        img.parentElement.style.textAlign = 'center'

        var modal = document.createElement('div')
        modal.id = 'modal'

        var modalContent = document.createElement('div')
        modal.appendChild(modalContent)

        var modalImage = document.createElement('img')
        modalContent.appendChild(modalImage)

        var span = document.createElement('span')
        span.classList.add('close')
        span.textContent = 'x'
        modal.appendChild(span)

        img.onclick = function () {
            console.log('clicked')
            document.body.appendChild(modal)
            modalImage.src = img.src
        }

        span.onclick = function () {
            document.body.removeChild(modal)
        }
    }

    handleImages()
</script>
</body>
</html>